Clients want to remodel the house. They want to gather preliminary information about how much it costs, how long it takes and whether there are Seattle areas that are more expensive than others. Clients are interested in modification, or demolition, of their residential dwelling. Clients are interested in fairly recent projects, i.e., within last two years.
Clients do not have a clear picture how they plan to remodel their house. Hence, answering this question is more exploratory in its nature. My strategy is to analyze the data from a few angles to give clients the cost perspective on remodeling options.
Translating clients' expectations to the available data, the dataset needs to be filtered first by PermitClassMapped and the value Residential, then by PermitTypeDesc to values New, Demolition, Addition/Alteration, maybe Change of Use Only - No Construction, and Deconstruction.
I filter also by PermitTypeMapped and removed records with Grading and Roof which are about 100 records total.
Clients are interested in recent projects defined as applied on January 1, 2022, or later.
Clients have a $400K budget for remodeling their house. They are interested in projects with the price tag $2000+.
Clients are interested in a single family building.
# To render properly in HTML, set
# exporting via VS Code "Jupyter:Export to HTML". It results in showing the
# figures properly, but also the input.
#
# For exporting externally and hide the code-cells in the export file do:
# jupyter nbconvert --to html --no-input file.ipynb
import plotly.io as pio
pio.renderers.default='notebook'
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# for maps
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Instead of setting the cell to Markdown, create Markdown from withnin a code cell!
# We can just use python variable replacement syntax to make the text dynamic
from IPython.display import Markdown as md
# for file manipulation
import os.path
import os
# my routines
import myutils as ut
# set rows and columns to show all of it
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# set the width of the column with no limits
pd.set_option('display.max_colwidth', None)
# setup for seaborn plots
sns.set_style('darkgrid')
sns.set_palette('Set2')
# the style of the map
MAP_STYLE = "open-street-map"
#MAP_STYLE = "stamen-terrain"
# the path to the original data
DATA_PATH_ORG='input/Building_Permits_20231024_saved.zip'
# the path to the processed data
DATA_PATH_PROCESSED='input/Building_Permits_20231024_clean.zip'
# the path to auxilary files for analysis
DATA_PATH_EXTRA='extras'
# the file where the baseline cost dataframe is stored
DATA_PATH_COST='input/Building_Permits_20231024_cost.csv.zip'
# the path to the description of the fields
DESC_PATH_ORG='input/csv_descr.csv'
# messages for debugging
# the info message
IMSG = 'INFO'
# for filtering
# the minimal date to be considered by the clients
MIN_DATE = pd.to_datetime('2022-01-01')
MAX_DATE = pd.to_datetime('2023-10-24')
# the budget of the clients in US dollars
BUDGET=400000
MIN_BUDGET=2000
# print(f"{IMSG}: Current Working Dir: {os.getcwd()}")
df = ut.read_to_df(DATA_PATH_PROCESSED, False)
# new names of columns
NEW_CHANGE_OF_USE = 'Change of Use Only'
NEW_ECA = 'ECA Shoreline Exmpt/Str. Imprv Exc Req'
'''
Does some adjustment to the dataframe for better processing or better
presentation.
@param df_ the dataframe to be modified
@return df_ after changes
'''
def adjust_df(df_):
OLD_ECA = 'ECA and Shoreline Exemption/Street Improvement Exception Request'
print(f"{IMSG}: PermitTypeMapped: changing to {NEW_ECA} from {OLD_ECA}")
df_['PermitTypeMapped'] = df_['PermitTypeMapped'].str.replace(OLD_ECA, NEW_ECA)
OLD_CHANGE_OF_USE = 'Change of Use Only - No Construction'
print(f"{IMSG}: PermitTypeDesc: changing to '{NEW_CHANGE_OF_USE}' from '{OLD_CHANGE_OF_USE}'")
df_['PermitTypeDesc'] = df_['PermitTypeDesc'].str.replace(OLD_CHANGE_OF_USE, NEW_CHANGE_OF_USE)
df = df.loc[(df['AppliedDate'] >= MIN_DATE) & (df['PermitClassMapped'] == 'Residential')]
df = df.loc[df['PermitTypeMapped'].isin(['Building', 'Demolition'])]
df = df.loc[~df['PermitTypeDesc'].isin(['Temporary', NEW_CHANGE_OF_USE, 'Curb Cut','Relocation'])]
# filter by cost
df = df.loc[df['EstProjectCost'].notnull() & (df['EstProjectCost'] <= BUDGET) & (df['EstProjectCost'] > MIN_BUDGET)]
# filter by type of the building
df = df.loc[df['PermitClass'] == 'Single Family/Duplex']
adjust_df(df)
# so this is the last instruction, otherwise the previous one will block the GUI
print()
INFO: PermitTypeMapped: changing to ECA Shoreline Exmpt/Str. Imprv Exc Req from ECA and Shoreline Exemption/Street Improvement Exception Request INFO: PermitTypeDesc: changing to 'Change of Use Only' from 'Change of Use Only - No Construction'
# this is our base cost-tailored to the client's requirements
df_cost = df.copy()
ut.wrt_to_csv(df_cost, DATA_PATH_COST)
# updates should be done
'''
get min or max date from a frame
@param d_ a dataframe
@return max or min value
'''
get_extreme = lambda d, is_max=True : d['AppliedDate'].max().date() if is_max else d['AppliedDate'].min().date()
'''
Presents some info related to the cost dataframe
@param df_ The dataframe to be examined
@return None
'''
def get_cost_info(df_):
print(f"{IMSG}: Dataframe {df_.info()}")
print(df_.describe())
print(f"{IMSG}: PermitTypeMapped: {df_['PermitTypeMapped'].unique()}")
print(f"{IMSG}: PermitTypeDesc: {df_['PermitTypeDesc'].unique()}")
'''
Plot frequencies of given data
@param series_ what we want to present
@param title_ The title of the plot
@return the plot to be shown
'''
def plot_freqs(series_, title_=""):
return px.histogram(series_,
title=title_,
labels = { 'value': 'Frequency'},
text_auto = True,
#marginal='box'
).update_layout(showlegend = False)
get_cost_info(df_cost)
MIN_DATE = get_extreme(df_cost, is_max=False)
MAX_DATE = get_extreme(df_cost)
print(f"{IMSG}: The dataset contains projects from {MIN_DATE} to {MAX_DATE}.")
<class 'pandas.core.frame.DataFrame'>
Index: 6150 entries, 2 to 11411
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PermitNum 6150 non-null object
1 PermitClass 6150 non-null object
2 PermitClassMapped 6150 non-null object
3 PermitTypeMapped 6150 non-null object
4 PermitTypeDesc 6104 non-null object
5 Description 6150 non-null object
6 HousingUnits 3674 non-null float64
7 HousingUnitsRemoved 3674 non-null float64
8 HousingUnitsAdded 3674 non-null float64
9 EstProjectCost 6150 non-null float64
10 AppliedDate 6150 non-null datetime64[ns]
11 IssuedDate 5047 non-null datetime64[ns]
12 ExpiresDate 5047 non-null datetime64[ns]
13 CompletedDate 1273 non-null datetime64[ns]
14 StatusCurrent 6150 non-null object
15 RelatedMup 71 non-null object
16 OriginalAddress1 6144 non-null object
17 OriginalCity 5433 non-null object
18 OriginalState 5529 non-null object
19 OriginalZip 5431 non-null object
20 ContractorCompanyName 60 non-null object
21 Link 6150 non-null object
22 Latitude 6138 non-null float64
23 Longitude 6138 non-null float64
24 Location1 6138 non-null object
25 CostBinned 6150 non-null object
dtypes: datetime64[ns](4), float64(6), object(16)
memory usage: 1.3+ MB
INFO: Dataframe None
HousingUnits HousingUnitsRemoved HousingUnitsAdded EstProjectCost \
count 3674.000000 3674.000000 3674.000000 6150.000000
mean 0.725367 0.060697 0.544910 101120.437413
min 0.000000 0.000000 0.000000 2100.000000
25% 0.000000 0.000000 0.000000 20000.000000
50% 1.000000 0.000000 0.000000 64543.500000
75% 1.000000 0.000000 1.000000 164081.750000
max 12.000000 3.000000 54.000000 400000.000000
std 0.553654 0.249947 1.616944 99566.925136
AppliedDate IssuedDate \
count 6150 5047
mean 2022-11-06 21:39:44.780487424 2022-12-16 00:22:32.407370752
min 2022-01-02 00:00:00 2022-01-03 00:00:00
25% 2022-05-26 00:00:00 2022-07-13 12:00:00
50% 2022-10-21 00:00:00 2022-12-20 00:00:00
75% 2023-04-19 18:00:00 2023-05-23 00:00:00
max 2023-10-20 00:00:00 2023-10-20 00:00:00
std NaN NaN
ExpiresDate CompletedDate \
count 5047 1273
mean 2024-06-23 09:52:19.211412736 2023-03-17 23:24:55.993715456
min 2022-03-22 00:00:00 2022-01-27 00:00:00
25% 2024-01-20 12:00:00 2022-12-07 00:00:00
50% 2024-07-03 00:00:00 2023-04-18 00:00:00
75% 2024-11-30 00:00:00 2023-07-18 00:00:00
max 2025-05-18 00:00:00 2023-10-20 00:00:00
std NaN NaN
Latitude Longitude
count 6138.000000 6138.000000
mean 47.631981 -122.335701
min 47.497344 -122.419971
25% 47.569826 -122.373857
50% 47.646839 -122.334463
75% 47.684523 -122.297238
max 47.733952 -122.221100
std 0.063658 0.042690
INFO: PermitTypeMapped: ['Building' 'Demolition']
INFO: PermitTypeDesc: ['Addition/Alteration' 'New' 'Demolition' nan 'Deconstruction'
'Change of Use Only']
INFO: The dataset contains projects from 2022-01-02 to 2023-10-20.
md(f"After filtering, I ended up with {len(df_cost.index)} datapoints."
" In order to see what data is left, let's take a look at the histograms.")
After filtering, I ended up with 6150 datapoints. In order to see what data is left, let's take a look at the histograms.
# compute percentage
'''
compute percentage
@param x the part of the total
@param tot the total
@param prec The requested precision
@return the percentage
'''
perc = lambda x, tot, prec=2: round((x / tot) * 100.0, prec)
def get_info_cost(df_):
# returns the number of rows for the dataframe x
rows_count = lambda x: len(x.index)
# get column with a column name col and value val for dataframe dat
get_cols = lambda dat, col, val: dat[dat[col] == val]
total = rows_count(df_)
#rows_count_cat = lambda cat, val: df_
build_count = rows_count(get_cols(df_, 'PermitTypeMapped', 'Building'))
demo_count = rows_count(get_cols(df_, 'PermitTypeMapped', 'Demolition'))
print(f"{IMSG}: PermitTypeMapped: Building={perc(build_count, total)}%"
f" Demolition={perc(demo_count, total)}%")
plot_freqs(df_['PermitTypeMapped'],
f"The Frequency of Permit Type from {MIN_DATE} to {MAX_DATE}.").show()
px.pie(df_, names='PermitTypeMapped', hole=0.3, color='PermitTypeMapped',
title='PermitTypeMapped percentage').show()
plot_freqs(df_cost['PermitTypeDesc'],
f"The Frequency of Permit Type Description from {MIN_DATE} to {MAX_DATE}.").show()
px.pie(df_, names='PermitTypeDesc', hole=0.3, color='PermitTypeDesc',
title='PermitTypeDesc percentage').show()
get_info_cost(df_cost)
INFO: PermitTypeMapped: Building=96.02% Demolition=3.98%
Given clients' constraints we have two permit types PermitTypeMapped with the following percentage: 96% of Building and 4% of Demolition.
Analyzing just the category PermitTypeDesc, majority of projects regard Addition/Alteration (81%) and New (14.7%), followed by Demolition (3.19%).
Now, let's look at projects with respect to PermitTypeMapped, i.e., Building and Demolition.
'''
Shows the info by categories
@param df_ dataframe
@return
'''
def get_by_cat_info(df_):
x = df_[df_['PermitTypeMapped'] == 'Building']
px.pie(x, names='PermitTypeDesc', hole=0.3, color='PermitTypeDesc',
title='PermitTypeMapped=Building percentage').show()
x = df_[df_['PermitTypeMapped'] == 'Demolition']
px.pie(x, names='PermitTypeDesc', hole=0.3, color='PermitTypeDesc',
title='PermitTypeMapped=Demolition percentage').show()
y = x[ (x['PermitTypeMapped'] == 'Demolition') & x['PermitTypeDesc'].isnull()]
print(f"{IMSG}: A dataframe for the PermitTypeMapped == Demolition")
print(y[['PermitNum', 'Description']])
y = x[ (x['PermitTypeMapped'] == 'Demolition') & x['PermitTypeDesc'].notnull() & (x['PermitTypeDesc'] == 'Deconstruction')]
print(f"{IMSG}: A dataframe for the PermitTypeMapped == Demolition and PermitTypeDesc == Deconstruction")
print(y[['PermitNum', 'Description']])
get_by_cat_info(df_cost)
INFO: A dataframe for the PermitTypeMapped == Demolition
PermitNum \
435 6985058-DM
657 6965964-DM
1004 6968879-DM
1172 6975628-DM
1524 6963964-DM
1525 6963965-DM
1526 6963966-DM
1590 6972783-DM
1602 6948163-DM
1608 6974212-DM
1611 6974155-DM
1831 6963967-DM
1924 6967222-DM
2434 6949299-DM
2798 6932196-DM
3074 6955218-DM
3286 6898146-DM
3318 6955096-DM
3430 6951273-DM
3438 6953760-DM
3582 6932191-DM
4128 6945905-DM
4187 6942756-DM
4322 6943861-DM
4323 6943866-DM
4772 6938374-DM
4866 6932659-DM
5229 6935065-DM
5230 6935068-DM
5231 6935075-DM
5341 6933950-DM
6356 6914821-DM
6490 6921717-DM
6684 6920278-DM
6782 6874575-DM
6857 6918391-DM
7070 6916527-DM
7196 6915168-DM
7550 6864439-DM
7551 6864681-DM
7748 6908514-DM
8597 6834498-DM
10318 6886174-DM
10365 6885771-DM
11135 6878023-DM
11354 6875963-DM
Description
435 Demolish existing one family dwelling, per plan.
657 Demolish existing one family dwelling, per plan.
1004 Demolish existing one family dwelling, per plan.
1172 Demolish existing one family dwelling, per plan.
1524 Demolish existing structures, per plan.
1525 Demolish existing structures, per plan.
1526 Demolish existing structures, per plan.
1590 Demolition of a single family home, subject to field inspection.
1602 Demolish existing structure, subject to field inspection.
1608 DEMO existing single family residence, subject-to-field-inspection (STFI)
1611 DEMO existing structures on property, subject-to-field-inspection (STFI)
1831 Demolish existing structures, per plan.
1924 Demolition of existing structure.
2434 Demolish existing single family residence, subject-to-field-inspection (STFI).
2798 Demolish existing one family dwelling and accessory structure, subject to field inspection.
3074 Demolish existing SFR & detached garage, subject to field inspection [STFI]
3286 Demolition of existing one-family dwelling, per plan.
3318 Demolish Existing House to Foundation. Foundation and Partial Floor Deck to Remain, subject to field inspection, STFI.
3430 Demo one-family dwelling, per plan
3438 Demolish single family residence and accessory sheds, subject to field inspection.
3582 Demolish existing one-family dwelling, per plan.
4128 Demolish existing single family residence, per plan. Foundation to remain.
4187 Demolish existing structures, per plan.
4322 Demo existing SFR, subject to field inspection, STFI.
4323 Demo existing SFR, foundation to remain, subject to field inspection, STFI.
4772 Demolish existing residential building, per plans
4866 Demolish existing structures, subject to field inspection.
5229 Demo of existing single family residence, subject to field inspection, STFI.
5230 Demo of existing single family residence, subject to field inspection, STFI. .
5231 Demo existing single family residence, subject to field inspection, STFI.
5341 Demo Existing SFR, subject to field inspection, STFI.
6356 Remove existing garage for SFR, subject to field inspection, STFI. (No ground disturbance, slab to remain).
6490 Demolish Existing SFR, subject to field inspection, STFI.
6684 Demolition of Single Family structure and accessory structures, subject to field inspection, STFI.
6782 Demolition of existing 1 story house with basement.
6857 Demolish existing single family residence and detached garage (to foundation only, no below-grade work or excavation this permit), subject to field inspection
7070 Demolition of vacant single family dwelling, subject to field inspection, STFI.
7196 Demolish House and Detached Garage. Partial Foundation and Basement Slab to Remain, subject to field inspection STFI
7550 Demolish existing structures, per plan.
7551 Demolish existing structures, per plan.
7748 Demolish 1/2 of shared detached garage and carport accessory to single family residence, subject to field inspection.
8597 Demolish existing structures, per plan.
10318 Demolish existing single family residence and detached garage, subject to field inspection (STFI)
10365 Demolish single family residence, subject to field inspection.
11135 Demolish existing detached garage accessory to single family residence, subject to field inspection. Existing floor slab and surface parking space to remain.
11354 Demo of existing garage, subject to field inspection, STFI.
INFO: A dataframe for the PermitTypeMapped == Demolition and PermitTypeDesc == Deconstruction
PermitNum \
791 6980430-DM
2280 6966952-DM
8433 6903353-DM
Description
791 Demo one-family dwelling & detached accessory structure, per plan
2280 Demolish existing single family residence, subject to field inspection (STFI)
8433 DECONSTRUCTION OF EXISTING SINGLE FAMILY DWELLING, FOUNDATION TO REMAIN, SUBJECT TO FIELD INSPECTION, STFI.
In the Building category, Addition/Alteration accounts for 84.3%, and New accounts for 15.3%.
In the Demolition category, Demolition has the largest share 80%. Also after further examination (studying the Description), the null values concern demolition as well and they account for 18.8%. Similarly, Deconstruction, after studying the Description field, is in fact demolition of buildings; (the description specifies sometimes that foundations can stay.), and it accounts for 1.22%.
Since the clients are interested in the price, let's get some insights into the price factor. Let's start with the descriptive statistics.
'''
plots the estimated project costs in boxplots
@param df_ the dataframe
@param max_cost_ the cost
@param type_ the type of the permit
@param values_ (list) what type of values are we interested in
@return fig
'''
def plotbox_prices(df_, max_cost_, permit_type_, values_):
x = df_[df_['PermitTypeMapped'] == permit_type_]
for d in values_:
y = x[x['PermitTypeDesc'] == d ]
print( f"PermitTypeDesc={d}\n{y['EstProjectCost'].describe()}\n" )
#print(x[['Addition/Alteration', 'New', NEW_CHANGE_OF_USE]].describe())
fig = px.box(x,
x='PermitTypeDesc', y = 'EstProjectCost', points='all',
title=f"Estimated Project Cost in the {permit_type_} category <= ${max_cost_} in US dollars for the period from {MIN_DATE} to {MAX_DATE}.")
return fig
plotbox_prices(df_cost, BUDGET, 'Building', ['Addition/Alteration', 'New', NEW_CHANGE_OF_USE]).show()
PermitTypeDesc=Addition/Alteration count 4979.000000 mean 83098.893242 std 85187.403683 min 2100.000000 25% 17000.000000 50% 50000.000000 75% 129671.200000 max 400000.000000 Name: EstProjectCost, dtype: float64 PermitTypeDesc=New count 904.000000 mean 201425.087743 std 98682.663162 min 2847.000000 25% 145824.750000 50% 182796.500000 75% 275980.750000 max 400000.000000 Name: EstProjectCost, dtype: float64 PermitTypeDesc=Change of Use Only count 22.000000 mean 118519.363636 std 154616.291964 min 5000.000000 25% 9115.750000 50% 32500.000000 75% 123750.000000 max 400000.000000 Name: EstProjectCost, dtype: float64
The summary of estimated project costs in the Building category with respect to PermitTypeDesc is summarized in the table below:
| Permit Type | Median |
|---|---|
| Addition/Alteration | $17,000 |
| New | $182,797 |
| Change of Use Only | $32,500 |
We can see that the Addition/Alteration cost upper fence is around \$298K, and Change Of Use Only is \$125K. There are projects that estimated cost is \$0 or close to it and they should be removed from the analysis as they are simply the missing values.
md(f"There are {len(df_cost[df_cost['EstProjectCost'] <= 3000])} records with cost < $3000.")
There are 44 records with cost < $3000.
x = df_cost[df_cost['EstProjectCost'] <= 3000]
x[['PermitNum', 'PermitTypeMapped', 'AppliedDate', 'EstProjectCost', 'Description']]
| PermitNum | PermitTypeMapped | AppliedDate | EstProjectCost | Description | |
|---|---|---|---|---|---|
| 46 | 6978681-CN | Building | 2023-10-19 | 2700.0 | Construct pergola addition at roof level of 2-family dwelling, per plan. |
| 469 | 6981745-CN | Building | 2023-09-20 | 2500.0 | Construct alterations to roof of existing detached garage for one-family dwelling, per plan |
| 493 | 6986581-CN | Building | 2023-09-19 | 3000.0 | Construct addition and alterations to existing single family residence, per STFI |
| 546 | 6981613-CN | Building | 2023-09-15 | 3000.0 | Construct retaining wall, per plan. (Establish use as single family residence per land use code. Construct new single family residence and retaining wall, per plan. Review and processing for two records under 6922671-CN)Two parcels have the same owner. Address is required for this parcel. |
| 586 | 6979376-CN | Building | 2023-09-13 | 3000.0 | Construct seismic retrofit to existing single family residence, per Earthquake Home Retrofit plan set. |
| 685 | 6984709-CN | Building | 2023-09-07 | 3000.0 | Interior alterations to basement of existing single family residence, subject to field inspection (STFI) |
| 922 | 6982285-CN | Building | 2023-08-22 | 3000.0 | Deck addition to SFR, subject to field inspection, STFI. |
| 1179 | 6979374-CN | Building | 2023-08-04 | 3000.0 | Construct seismic retrofit to existing single family residence, per Earthquake Home Retrofit plan set |
| 1308 | 6973824-CN | Building | 2023-07-27 | 3000.0 | Voluntary seismic retrofit upgrade to crawlspace of SFR, per "Earthquake Home Retrofit" prescriptive plan set. |
| 1387 | 6976722-CN | Building | 2023-07-21 | 3000.0 | Voluntary engineered seismic retrofit for SFR, subject to field inspection, STFI. |
| 1550 | 6975128-CN | Building | 2023-07-12 | 3000.0 | Interior alterations to bathroom in SFR, subject to field inspection, STFI. |
| 1995 | 6970375-CN | Building | 2023-06-14 | 3000.0 | Alterations to existing single family residence, subject to field inspection |
| 2120 | 6957521-CN | Building | 2023-06-07 | 2500.0 | Allow new attached accessory dwelling unit to existing single family use per land use code. Construct alterations for a two family dwelling, per plan. |
| 2597 | 6947396-CN | Building | 2023-05-09 | 3000.0 | Allow attached accessory dwelling unit to existing single family residence per land use code. Construct alterations in the basement to create new two family dwelling, per plan. Basement remodel per separate permit 6859761-CN. |
| 3051 | 6958380-CN | Building | 2023-04-11 | 3000.0 | Engineered voluntary seismic retrofit for SFR, subject to field inspection, STFI. |
| 3224 | 6956245-CN | Building | 2023-03-30 | 3000.0 | Construct exterior and interior alterations to existing single family residence, per STFI |
| 4737 | 6939874-CN | Building | 2022-12-23 | 2500.0 | Addition of exterior bump out to SFR, subject to field inspection, STFI. |
| 5329 | 6929720-CN | Building | 2022-11-16 | 2500.0 | Repair and replace portions of existing deck on a single-family residence, subject to field inspection (STFI) |
| 5541 | 6931493-CN | Building | 2022-11-04 | 2500.0 | Construct alterations to remove bearing and non-bearing interior walls at main floor dining room within existing single family residence, subject to field inspection, (STFI). |
| 5604 | 6931373-CN | Building | 2022-11-01 | 3000.0 | Engineered voluntary seismic retrofit for SFR, subject to field inspection. STFI. |
| 5658 | 6920309-CN | Building | 2022-10-28 | 3000.0 | Construct alterations and install above ground swimming pool accessory to a single-family residence, per plan. |
| 6115 | 6925075-CN | Building | 2022-10-03 | 3000.0 | Voluntary seismic retrofit upgrade to basement of SFR, per "PROJECT IMPACT" plan set, (SEHR) permit. No earth disturbance. |
| 6395 | 6913916-CN | Building | 2022-09-18 | 2200.0 | Construct new pergola to existing single family residence, per plan. |
| 6558 | 6855301-CN | Building | 2022-09-07 | 3000.0 | Establish parking space and construct alterations to install retaining walls and parking slab accessory to single family residence, per plan. |
| 7427 | 6913126-CN | Building | 2022-07-25 | 3000.0 | Alterations to enlarge wall opening between living and dining rooms of single family residence, subject to field inspection. |
| 7515 | 6912376-CN | Building | 2022-07-20 | 2500.0 | Remove detached carport to a single-family residence, subject to field inspection. |
| 7807 | 6909770-CN | Building | 2022-07-07 | 3000.0 | Replace roof of garage at existing single family residence, subject to field inspection (STFI), |
| 8235 | 6905670-CN | Building | 2022-06-14 | 2500.0 | Construct alterations to Single Family Residence, subject-to-field inspection (STFI) |
| 8259 | 6905400-CN | Building | 2022-06-13 | 2500.0 | Dormer addition to SFR, subject to field inspection, STFI. |
| 8295 | 6896399-CN | Building | 2022-06-10 | 2103.0 | Construct alterations to existing deck and site work to existing single family residence, per plan. |
| 8422 | 6903584-CN | Building | 2022-06-03 | 3000.0 | Construction of new deck to existing single family residence, subject to field inspection. |
| 8466 | 6902382-CN | Building | 2022-06-02 | 2500.0 | Deck addition/ expansion for SFR, subject to field inspection, STFI. Revision to enlarge deck 2.24.2023. |
| 8549 | 6881268-CN | Building | 2022-05-31 | 2600.0 | Construct alterations to existing single family residence for 2 egress wells, subject to field inspection. |
| 9120 | 6897263-CN | Building | 2022-05-02 | 3000.0 | Subject-to-Field-Inspection STFI: Widening the doorway openings in three interior walls to allow a more usable space with better natural light. All spans/openings are located in interior walls and are under 10 feet.No change to the square footage of the home. |
| 9833 | 6870920-CN | Building | 2022-03-28 | 2163.0 | Construct second story screened deck addition for existing single family residence, per plan. |
| 9940 | 6861578-CN | Building | 2022-03-23 | 2847.0 | Construct retaining walls within yard of existing single-family residence, per plan. |
| 9956 | 6878526-CN | Building | 2022-03-22 | 2644.0 | Construct deck addition to existing single family residence, per plan |
| 10261 | 6881358-CN | Building | 2022-03-07 | 3000.0 | Establish attached accessory dwelling unit (AADU) and alterations to existing single family residence, per plan |
| 10532 | 6884430-CN | Building | 2022-02-22 | 3000.0 | Engineered seismic retrofit for SFR, subject to field inspection, STFI. |
| 10967 | 6879837-CN | Building | 2022-01-27 | 3000.0 | Earthquake retrofit for single family residence, per plan |
| 10978 | 6875872-CN | Building | 2022-01-26 | 2100.0 | Construct exterior stair from existing deck at NW corner of single family residence, per plan. |
| 11068 | 6875097-CN | Building | 2022-01-21 | 2150.0 | Construct second level deck addition to south side of existing townhouse residence, per plan. |
| 11135 | 6878023-DM | Demolition | 2022-01-18 | 2500.0 | Demolish existing detached garage accessory to single family residence, subject to field inspection. Existing floor slab and surface parking space to remain. |
| 11271 | 6876694-CN | Building | 2022-01-10 | 2400.0 | Construct exterior alterations to existing single family residence, per STFI |
Let's find out what kind of projects fall into a specific price range. The below code will generate data files grouped by the specific price range.
md(f"They files can be found in folder: {DATA_PATH_EXTRA}.")
They files can be found in folder: extras.
def study_cost(df_):
PRFX = 'estprojectcost'
# get the cost >= t_lo and < t_hi
def get_cost(df_, t_lo, t_hi):
x = df_[ (df_['EstProjectCost'] < t_hi) & (df_['EstProjectCost'] >= t_lo) ]
ut.wrt_to_csv(x[['PermitNum', 'PermitTypeMapped', 'AppliedDate', 'EstProjectCost', 'Description']],
f"{DATA_PATH_EXTRA}/{PRFX}-{t_lo}_{t_hi}.csv.zip")
get_csv = lambda fr, lo, hi, step : [get_cost(df_, t, t+step) for t in range(lo, hi, step)]
get_csv(df_, 0, 10000, 10000)
get_csv(df_, 10000, 50000, 40000)
get_csv(df_, 50000, BUDGET, 50000)
study_cost(df_cost)
plotbox_prices(df_cost, BUDGET, 'Demolition', ['Demolition', 'Deconstruction', 'Addition/Alteration']).show()
PermitTypeDesc=Demolition count 196.000000 mean 110107.282245 std 149263.144112 min 3500.000000 25% 5000.000000 50% 20000.000000 75% 225000.000000 max 400000.000000 Name: EstProjectCost, dtype: float64 PermitTypeDesc=Deconstruction count 3.000000 mean 51666.666667 std 32532.035493 min 20000.000000 25% 35000.000000 50% 50000.000000 75% 67500.000000 max 85000.000000 Name: EstProjectCost, dtype: float64 PermitTypeDesc=Addition/Alteration count 0.0 mean NaN std NaN min NaN 25% NaN 50% NaN 75% NaN max NaN Name: EstProjectCost, dtype: float64
The summary of estimated project costs in the Demolition category with respect to PermitTypeDesc is summarized in the table below:
| PermitType | Median |
|---|---|
| Demolition | $20,000 |
| Deconstruction | $50,000 |
The Demolition cost upper fence is \$400K; the Deconstruction cost upper fence is \85K.
In order to answer the question regarding the cost, I had to read manually descriptions of the projects. I could have run the automatic analysis by trying to expose the key words but for this exploratory project phase, the manual approach appeared more appropriate. The results are summarized below:
demolish deck; construct shed accessory; interior alteration to remove nonstructural wall in kitchen; add bathroom in basement, construct porch; construct front deck addition; construct new window; construct deck addition; construct second story screened deck addition; construct new skylights; construct uncovered front porch; replace existing deck; construct upper floor deck addition; replace existing front porch steps; construct storage shed; construct new attached carport; alterations to construct bathroom; construct covered front porch; remove existing garage; constructing garage; construct deck; construct new windows; shed addition to garage; Replacing interior and exterior stair guard rail from metal; hot tub addition; Replacing 9 windows; interior upper floor alteration to convert closet to bathroom in Bedroom #1; repair and replace existing front porch; construct two storage spaces inside attached garage; remove carport and construction detached accessory structure; Remove existing detached garage and construct addition of new detached garage accessory; alterations to replace deck on existing deck footings; conversion bedroom to bathroom; construct deck addition; replace windows; construct a detached prefab carport; deck renovation and construct covered pergola; new garage; bathroom addition to basement; shower addition; replace 9 windows; enlarge door for new pair of French doors; demolish a single-family residence
Deck addition; replace 15 window; construct single story addition; construct structural alterations and replace beam; Construct rooftop deck; demolish residential building/structures; Convert part of an existing bedroom (on the second floor) into a 3/4 bathroom; Construct detached home office accessory; Construct entry porch; Construct alterations to expand/add exterior decks and stairs; Enclose existing carport; Converting upstairs existing bedroom into a bathroom & walk in closet; Construct 80 foot addition and interior alterations; replace siding, replace and extend existing deck; construct alterations to existing garage to convert into accessory structure (heated office); roof replacement
Construct alterations to add an elevator; allow new attached accessory dwelling unit; install new modular one-family dwelling; construct deck addition and expand kitchen; convert attached garage to a single-family residence; construct detached garage with heated rec room; establish use as attached accessory dwelling unit (AADU); Construct rear yard attached deck with partial overhead roof covering and a detached, covered patio area; Allow new detached accessory dwelling unit; construct new pool/spa; Alterations to convert portion of existing attached garage to living space; Construct new one family dwelling; add Deatached Accessory Dwelling Unit (DADU); Construct one story addition; New detached, unconditioned garage to replace existing detached garage; convert closet to bathroom; 18 sf Dining addition, 324 sf Kitchen addition, 120 sf Bathroom addition, 131 sf Rear Porch addition, 49 sf Exterior stairs to basement; Construct single story addition;
Establish use and construct new DADU; construct single-family dwelling unit addition; construct 2-story addition to single family residence; construct bedroom and deck additions to single family residence and alterations; Covered deck addition (only); Construct laundry, bedroom and deck addition to single family residence and interior alterations; Construct new 2nd story and substantial alterations to basement and first floors; Construct a one-family dwelling (DADU 1); Build a New detached accessory dwelling unit; Establish use as attached accessory dwelling unit (AADU); Construct 2nd story dormer additions and substantial alterations; Construct 2-story addition; Construct 2nd story addition and alterations; Construct one-family dwelling; Construct 2nd story addition and substantial alterations.
Construct one-family dwelling; Construct a 2-story addition; Construct new DADU; Construct new one family dwelling.
Allow new detached accessory dwelling unit; Construct a one-family dwelling; Construct 2nd story addition and alterations; Construct alterations and deck addition; Allow an attached accessory dwelling unit; Allow Detached Accessory Dwelling Unit; Construct new one family dwelling; Change use to Duplex and construct addition and substantial alterations; Alterations to convert detached garage to studio accessory; Construct deck addition and substantial alterations; Construct 1-story addition, covered rear deck addition, and front porch addition; Construct two-family dwelling; Construct 2-story addition and substantial alterations; CONSTRUCT NEW FRONT PORCH AT NORTHEAST CORNER OF HOUSE. CONSTRUCT TWO-STORY ADDITION AT SOUTHWEST CORNER OF HOUSE AND EXPAND EXISTING DECK.
Establish 3 new townhouse units with AADUs and 5 new single family homes with AADUs; Construct middle two-family dwelling (bldg. 2); Allow new attached accessory dwelling unit; Construct 2-story addition to single family residence and substantial alterations; construct one-family dwelling; Construct second-story addition and substantial alterations; Construct new single family dwelling; Establish 1 single-family residence from 2 detached residences; Construct 2-story addition,
Construct substantial alterations and a deck; Construct alterations and additions, including deck addition; Construct two story addition and substantial alterations; Allow new detached accessory dwelling unit; Construct north one family dwelling; Construct new one family dwelling; Allow new attached accessory dwelling unit; Construct carport and exterior deck addition to single family residence and substantial alterations throughout basement and main floor; Construct west two-family dwelling; Construct east two-family dwelling.
Construct new 2nd story addition; Construct east one family dwelling; Construct one-family dwelling; Construct new two-family dwelling; Construct new two-family dwelling; Establish townhouse residence and construct as two family dwelling; Construct 1st and 2nd floor additions and substantial alterations; Construct 2-story addition w/ attached accessory dwelling unit (AADU); Construct new two family dwelling.
We have a few date points in our dataset: AppliedDate, IssuedDate, ExpiresDate, and CompletedDate. I will find out:
The dates might depend on the permit type and I will explore whether it is indeed a case.
We start with the Building category.
'''
Analyzes the dates
@param df_ The dataframe to get
@param permit_type_ A type of the permit
@param diff_name_ A name for a difference column
@param minuend_col_ A name of the column that we subtract from
@param subtrahend_col_ A name of the column that we subtract
@return
'''
def analyze_dates(df_, permit_type_, diff_name_, minuend_col_, subtrahend_col_):
x = df_[ (df_['PermitTypeMapped'] == permit_type_)
& df_['AppliedDate'].notna()
& df_['IssuedDate'].notna()
& df_['ExpiresDate'].notna()
& df_['CompletedDate'].notna()].copy()
print(f"In the {permit_type_} category we have: {len(x.index)} records.")
x[diff_name_] = (x[minuend_col_] - x[subtrahend_col_]).dt.days
# the box plot
fig = px.box(x, x='PermitTypeDesc', y = diff_name_, points='all',
title=f"Duration of the project in days in {permit_type_} category from {subtrahend_col_} till {minuend_col_}.")
fig.update_layout(yaxis_title=f"Proj Duration = {minuend_col_} - {subtrahend_col_} [Days]")
fig.show()
# the frequency plot regardless of permit type desc
#fig = plot_freqs(x['Compl-ApplDate'],
# f"The Frequency of Projects Duration in {permit_type_} category from application to completion.")
#fig.update_layout(xaxis_title='Days')
#fig.show()
perm_desc = x['PermitTypeDesc'].unique()
print(f"We have the following permit descriptions: perm_desc = {perm_desc}")
# plot the freq diagrams for each permit type description
for el in perm_desc:
y = x[x['PermitTypeDesc'] == el]
title = f"The Frequency of Projects Duration in {permit_type_} category for {el} from {subtrahend_col_} to {minuend_col_}."
fig = plot_freqs(y[diff_name_], title)
fig.update_layout(xaxis_title='Days')
fig.show()
print(f"Descriptive stat:\n{y[diff_name_].describe()}")
analyze_dates(df_cost, 'Building', 'Appl-ComplDate', 'CompletedDate', 'AppliedDate')
In the Building category we have: 1218 records.
We have the following permit descriptions: perm_desc = ['Addition/Alteration' 'New' 'Change of Use Only']
Descriptive stat: count 1162.000000 mean 221.358003 std 153.001385 min 0.000000 25% 99.000000 50% 202.000000 75% 334.000000 max 616.000000 Name: Appl-ComplDate, dtype: float64
Descriptive stat: count 50.00000 mean 301.58000 std 176.68548 min 9.00000 25% 147.00000 50% 325.50000 75% 436.50000 max 596.00000 Name: Appl-ComplDate, dtype: float64
Descriptive stat: count 6.000000 mean 202.000000 std 168.337756 min 14.000000 25% 66.000000 50% 198.000000 75% 295.500000 max 452.000000 Name: Appl-ComplDate, dtype: float64
In the Building category, we have as follows:
| Stat | Addition/Alteration | New | Change Of Use Only |
|---|---|---|---|
| Count | 1162 | 50 | 6 |
| Q3 | 334 | 438 | 316 |
| Median | 202 | 325.5 | 198 |
| Q1 | 99 | 146 | 34 |
Completion of the New Permit project takes 124 days longer than Addition/Alteration project if median is taken into account. Change Of Use Only can be considered as Addition/Alteration in terms of total project duration. The longest projects take ~600 days from application to completion.
Now, let's take a look at the Demolition category.
'''
Analyzes the dates for the demolition - There is NaN in the PermitTypeDesc
that messes upt analyze_dates() so I needed to customize it.
@param df_ The dataframe to get
@param permit_type_ A type of the permit
@param diff_name_ A name for a difference column
@param minuend_col_ A name of the column that we subtract from
@param subtrahend_col_ A name of the column that we subtract
@return
'''
def analyze_dates_demo(df_, permit_type_, diff_name_, minuend_col_, subtrahend_col_):
x = df_[ (df_['PermitTypeMapped'] == permit_type_)
& df_['AppliedDate'].notna()
& df_['IssuedDate'].notna()
& df_['ExpiresDate'].notna()
& df_['CompletedDate'].notna()].copy()
print(f"In the {permit_type_} category we have: {len(x.index)} records.")
x[diff_name_] = (x[minuend_col_] - x[subtrahend_col_]).dt.days
# the box plot
fig = px.box(x, x='PermitTypeDesc', y = diff_name_, points='all',
title=f"Duration of the project in days in {permit_type_} category from {subtrahend_col_} till {minuend_col_}.")
fig.update_layout(yaxis_title=f"Proj Duration = {minuend_col_} - {subtrahend_col_} [Days]")
fig.show()
# the frequency plot regardless of permit type desc
#fig = plot_freqs(x['Compl-ApplDate'],
# f"The Frequency of Projects Duration in {permit_type_} category from application to completion.")
#fig.update_layout(xaxis_title='Days')
#fig.show()
# for Demolition we have some Nan in PermitTypeDesc for 5 observations
# so I will not distinguish them and plot the frequency for all of them
#perm_desc = x['PermitTypeDesc'].unique()
#print(f"We have the following permit descriptions: perm_desc = {perm_desc}")
# plot the freq diagrams for each permit type description
title = f"The Frequency of Projects Duration in {permit_type_} category from {subtrahend_col_} to {minuend_col_}."
fig = plot_freqs(x[diff_name_], title)
fig.update_layout(xaxis_title='Days')
fig.show()
print(f"Descriptive stat:\n{x[diff_name_].describe()}")
analyze_dates_demo(df_cost, 'Demolition', 'Appl-ComplDate', 'CompletedDate', 'AppliedDate')
In the Demolition category we have: 55 records.
Descriptive stat: count 55.000000 mean 281.400000 std 138.181925 min 49.000000 25% 164.000000 50% 246.000000 75% 385.500000 max 553.000000 Name: Appl-ComplDate, dtype: float64
In the Demolition category the median of the project is 246 days, 42 days longer than Addition/Alteration and 80 shorter than New in the Building category.
analyze_dates(df_cost, 'Building', 'Issued-ComplDate', 'IssuedDate', 'AppliedDate')
analyze_dates_demo(df_cost, 'Demolition', 'Issued-ComplDate', 'IssuedDate', 'AppliedDate')
In the Building category we have: 1218 records.
We have the following permit descriptions: perm_desc = ['Addition/Alteration' 'New' 'Change of Use Only']
Descriptive stat: count 1162.000000 mean 30.061102 std 57.358211 min 0.000000 25% 1.000000 50% 3.000000 75% 34.000000 max 429.000000 Name: Issued-ComplDate, dtype: float64
Descriptive stat: count 50.000000 mean 61.300000 std 72.979799 min 0.000000 25% 1.000000 50% 29.000000 75% 116.250000 max 300.000000 Name: Issued-ComplDate, dtype: float64
Descriptive stat: count 6.000000 mean 55.333333 std 57.974707 min 0.000000 25% 5.500000 50% 48.500000 75% 87.750000 max 143.000000 Name: Issued-ComplDate, dtype: float64 In the Demolition category we have: 55 records.
Descriptive stat: count 55.000000 mean 53.054545 std 84.352966 min 0.000000 25% 5.000000 50% 21.000000 75% 48.500000 max 335.000000 Name: Issued-ComplDate, dtype: float64
The time to issue a permit from the application day is as follows:
| Stat | Addition/Alteration | New | Change Of Use Only | Demolition |
|---|---|---|---|---|
| Count | 1162 | 50 | 6 | 55 |
| Q3 | 34 | 117 | 88 | 49 |
| Median | 3 | 29 | 49 | 21 |
| Q1 | 1 | 1 | 6 | 5 |
The longest, in terms of median, takes to issue the Change Of Use Only permit. For the New permit, the median waiting time is 29 days, similar to the Demolition permit 21 days. The shortest waiting time in terms of median, is for Addition/Alteration and it takes 3 days to issue the permit.
'''
Modified analyze_dates(); to take care of one outlier
Analyzes the dates
@param df_ The dataframe to get
@param permit_type_ A type of the permit
@param diff_name_ A name for a difference column
@param minuend_col_ A name of the column that we subtract from
@param subtrahend_col_ A name of the column that we subtract
@return
'''
def mod_analyze_dates(df_, permit_type_, diff_name_, minuend_col_, subtrahend_col_):
x = df_[ (df_['PermitTypeMapped'] == permit_type_)
& df_['AppliedDate'].notna()
& df_['IssuedDate'].notna()
& df_['ExpiresDate'].notna()
& df_['CompletedDate'].notna()].copy()
x[diff_name_] = (x[minuend_col_] - x[subtrahend_col_]).dt.days
# there is an outlier that I want to eliminate to get better viz
THRESHOLD = 1000
analyze_dates(x[ x[diff_name_] < THRESHOLD ], 'Building', 'Expiry-IssuedDate', 'ExpiresDate', 'IssuedDate')
mod_analyze_dates(df_cost, 'Building', 'Expiry-IssuedDate', 'ExpiresDate', 'IssuedDate')
analyze_dates_demo(df_cost, 'Demolition', 'Expiry-IssuedDate', 'ExpiresDate', 'IssuedDate')
In the Building category we have: 1216 records.
We have the following permit descriptions: perm_desc = ['Addition/Alteration' 'New' 'Change of Use Only']
Descriptive stat: count 1160.000000 mean 548.151724 std 1.116701 min 546.000000 25% 547.000000 50% 548.000000 75% 549.000000 max 550.000000 Name: Expiry-IssuedDate, dtype: float64
Descriptive stat: count 50.000000 mean 548.320000 std 1.058301 min 546.000000 25% 548.000000 50% 548.000000 75% 549.000000 max 550.000000 Name: Expiry-IssuedDate, dtype: float64
Descriptive stat: count 6.000000 mean 548.166667 std 1.169045 min 547.000000 25% 547.250000 50% 548.000000 75% 548.750000 max 550.000000 Name: Expiry-IssuedDate, dtype: float64 In the Demolition category we have: 55 records.
Descriptive stat: count 55.000000 mean 548.181818 std 1.203250 min 546.000000 25% 547.000000 50% 548.000000 75% 549.000000 max 550.000000 Name: Expiry-IssuedDate, dtype: float64
The time to issue a permit from the application day is as follows:
| Stat | Addition/Alteration | New | Change Of Use Only | Demolition |
|---|---|---|---|---|
| Count | 1160 | 50 | 6 | 55 |
| Q3 | 549 | 549 | 548 | 549 |
| Median | 548 | 548 | 548 | 548 |
| Q1 | 547 | 548 | 547 | 547 |
The permit is valid 548 days from the issue date regardless of the permit type.
After visual examination of the maps that show locations of building permits, there are no clear tendencies that favor a specific Seattle region with respect to estimated project cost. The projects' locations show even scatter over the entire Seattle region. However, more in-depth exploration can reveal nuances and can be conducted upon client's request.
def plot_mapbox(df_):
#px.set_mapbox_access_token(open(".mapbox_token").read())
"""
f = px.scatter_mapbox(l_df, lat = "lat", lon = "lng",
center = dict (lat = MAP_CENTER_LAT, lon = MAP_CENTER_LNG), zoom = 8,
hover_name = "SiteName", hover_data = ["Locator", col_name_, class_name],
mapbox_style=MAP_STYLE, title = descr,
size = col_name_,
size_max = 20,
color = class_name,
#labels = { "legend" : {"-1" : "<40", "0" : "[40; 80)", "1" : ">=80"} },
#legend = {"-1" : "<40", "0" : "[40; 80)", "1" : ">=80"},
color_discrete_map = {
names[0] : "red",
names[1] : "yellow",
names[2] : "blue"
},
category_orders = {
class_name : [names[2], names[1], names[0]]
},
width = 800, height = 900)
"""
#print(df_.head())
title = f"Estimated Project Cost <= ${BUDGET} for Building Permits Issued By<BR>Seattle Department Of Construction and Inspections from {MIN_DATE} to {MAX_DATE}."
fig = px.scatter_mapbox(df_, lat="Latitude", lon="Longitude",
title = title,
color="EstProjectCost", size="EstProjectCost",
mapbox_style=MAP_STYLE,
hover_name = 'EstProjectCost',
# 'Description' sometimes is too long
hover_data = ['PermitNum', 'PermitTypeMapped', 'PermitTypeDesc'],
#color_continuous_scale= px.colors.sequential.Bluered,
color_continuous_scale= px.colors.sequential.Turbo,
#px.colors.cyclical.IceFire,
size_max=15, zoom=10,
width = 800, height = 900)
fig.show()
# binning is very coarsed-grained so I need to create a new bins
labels = ['(0, 10K]', '(10K, 50K]', '(50K, 100K]', '(100K, 200K]',
'(200K, 300K]', '(300K, 400K]']
df_['CostBinned2'] = pd.cut(
df_['EstProjectCost'],
[0, 1.0e+04, 5.0e+04, 1.0e+05, 2.0e+05, 3.0e+05, BUDGET],
BUDGET,
labels = labels,
include_lowest=True,
ordered = True
)
# print(df_['CostBinned2'].unique())
fig = px.scatter_mapbox(df_, lat="Latitude", lon="Longitude",
title = title,
color="CostBinned2",
mapbox_style=MAP_STYLE,
category_orders= {'CostBinned2' : labels },
hover_name = 'CostBinned2',
# 'Description' sometimes is too long
hover_data = ['EstProjectCost','PermitNum', 'PermitTypeMapped', 'PermitTypeDesc'],
#color_continuous_scale= px.colors.sequential.Bluered,
#color_continuous_scale= px.colors.sequential.Turbo,
color_discrete_sequence= px.colors.sequential.Inferno_r,
#px.colors.cyclical.IceFire,
size_max=15, zoom=10,
width = 800, height = 900)
fig.show()
plot_mapbox(df_cost)